source("https://bioconductor.org/biocLite.R")
source("http://bioconductor.org/workflows.R")
workflowInstall("rnaseqGene")
biocLite("GO.db")
biocLite("org.Hs.eg.db")
biocLite("airway")
biocLite("DESeq2")
library(rnaseqGene)
library("airway")
data("airway")
se <- airway
se$dex <- relevel(se$dex, "untrt")
se$dex
## [1] untrt trt untrt trt untrt trt untrt trt
## Levels: untrt trt
library("DESeq2")
dds <- DESeqDataSet(se, design = ~ cell + dex)
nrow(dds)
## [1] 64102
dds <- dds[ rowSums(counts(dds)) > 1, ]
nrow(dds)
## [1] 29391
rld <- rlog(dds, blind=FALSE)
head(assay(rld), 3)
## SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516 SRR1039517 SRR1039520
## ENSG00000000003 9.385683 9.052608 9.516875 9.285338 9.839085 9.530311 9.663255
## ENSG00000000419 8.869616 9.138271 9.036116 9.075295 8.972126 9.131824 8.861534
## ENSG00000000457 7.961369 7.881385 7.824079 7.921490 7.751699 7.886441 7.957121
## SRR1039521
## ENSG00000000003 9.277699
## ENSG00000000419 9.060905
## ENSG00000000457 7.912123
par( mfrow = c( 1, 2 ) )
dds <- estimateSizeFactors(dds)
plot(log2(counts(dds, normalized=TRUE)[,1:2] + 1),
pch=16, cex=0.3)
plot(assay(rld)[,1:2],
pch=16, cex=0.3)
dds <- DESeq(dds)
(res <- results(dds))
## log2 fold change (MAP): dex trt vs untrt
## Wald test p-value: dex trt vs untrt
## DataFrame with 29391 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000000003 708.6021697 -0.37415193 0.09884432 -3.7852648 0.000153545 0.00128686
## ENSG00000000419 520.2979006 0.20206144 0.10974240 1.8412340 0.065587276 0.19676183
## ENSG00000000457 237.1630368 0.03616620 0.13834538 0.2614196 0.793768939 0.91372953
## ENSG00000000460 57.9326331 -0.08446385 0.24990676 -0.3379815 0.735377161 0.88385059
## ENSG00000000938 0.3180984 -0.08413904 0.15133427 -0.5559814 0.578223585 NA
## ... ... ... ... ... ... ...
## ENSG00000273485 1.2864477 0.03398815 0.2932360 0.1159071 0.9077261 NA
## ENSG00000273486 15.4525365 -0.09560732 0.3410333 -0.2803460 0.7792120 0.9062268
## ENSG00000273487 8.1632350 0.55007412 0.3725061 1.4766847 0.1397602 0.3389275
## ENSG00000273488 8.5844790 0.10515293 0.3683834 0.2854442 0.7753038 0.9039857
## ENSG00000273489 0.2758994 0.06947900 0.1512520 0.4593591 0.6459763 NA
library(GO.db)
library(org.Hs.eg.db)
res$go_id <- mapIds(org.Hs.eg.db,
keys=rownames(res),
column="GO",
keytype="ENSEMBL",
multiVals="first")
Remove genes with no GO annotation
res <- res[!is.na(res$go_id), ]
res <- res[res$go_id %in% keys(org.Hs.eg.db, keytype = "GO"), ]
Remove insignificant genes
res <- res[(! is.na(res$padj)) & res$padj <= 0.05, ]
Remove genes with small changes
res <- res[abs(res$log2FoldChange) >= 0.5, ]
term_class <- function(x, current = x, all_paths = TRUE, type = GOCCPARENTS) {
# Get immediate children of current taxon
parents = tryCatch({
names(AnnotationDbi::Term(as.list(type[x[1]])[[1]]))
}, error = function(e) {
c()
})
# only go down one path if desired
if (! all_paths) {
parents <- parents[1]
}
parents <- parents[parents != "all"]
if (is.null(parents)) {
return(c())
} else if (length(parents) == 0) {
return(paste0(collapse = "|", AnnotationDbi::Term(x)))
} else {
next_x <- lapply(parents, function(y) c(y, x))
# Run this function on them to get their output
child_output <- lapply(next_x, term_class, all_paths = all_paths, type = type)
output <- unlist(child_output, recursive = FALSE)
return(output)
}
}
cc_class <- lapply(res$go_id, term_class, all_paths = FALSE, type = GOCCPARENTS)
mf_class <- lapply(res$go_id, term_class, all_paths = FALSE, type = GOMFPARENTS)
bp_class <- lapply(res$go_id, term_class, all_paths = FALSE, type = GOBPPARENTS)
cc_res <- res[rep(1:nrow(res), sapply(cc_class, length)), ]
cc_res$class <- unlist(cc_class)
library(metacoder)
data <- parse_taxonomy_table(input = as.data.frame(cc_res),
taxon_col = c("class" = -1),
other_col_type = "obs_info",
class_sep = "\\|")
data$taxon_funcs <- c(data$taxon_funcs,
change = function(x, subset = NULL) {
vapply(obs(x),
function(i) mean(data$obs_data[i, ]$log2FoldChange, na.rm = TRUE),
numeric(1)) })
data %>%
heat_tree(node_label = name,
node_size = n_obs,
node_color = change,
node_color_trans = "linear",
node_color_range = diverging_palette(),
node_color_interval = c(-1, 1),
edge_color_trans = "linear",
edge_color_range = diverging_palette(),
edge_color_interval = c(-1, 1),
node_label_max = 300,
output_file = file.path(output_folder,
paste0("gene_expression--cellular_component",
output_format)))
bp_res <- res[rep(1:nrow(res), sapply(bp_class, length)), ]
bp_res$class <- unlist(bp_class)
library(metacoder)
data <- parse_taxonomy_table(input = as.data.frame(bp_res),
taxon_col = c("class" = -1),
other_col_type = "obs_info",
class_sep = "\\|")
data$taxon_funcs <- c(data$taxon_funcs,
change = function(x, subset = NULL) {
vapply(obs(x),
function(i) mean(data$obs_data[i, ]$log2FoldChange, na.rm = TRUE),
numeric(1)) })
data %>%
heat_tree(node_label = name,
node_size = n_obs,
node_color = change,
node_color_trans = "linear",
node_color_range = diverging_palette(),
node_color_interval = c(-1, 1),
edge_color_trans = "linear",
edge_color_range = diverging_palette(),
edge_color_interval = c(-1, 1),
node_label_max = 300,
output_file = file.path(output_folder,
paste0("gene_expression--biological_process",
output_format)))
mf_res <- res[rep(1:nrow(res), sapply(mf_class, length)), ]
mf_res$class <- unlist(mf_class)
library(metacoder)
data <- parse_taxonomy_table(input = as.data.frame(mf_res),
taxon_col = c("class" = -1),
other_col_type = "obs_info",
class_sep = "\\|")
data$taxon_funcs <- c(data$taxon_funcs,
change = function(x, subset = NULL) {
vapply(obs(x),
function(i) mean(data$obs_data[i, ]$log2FoldChange, na.rm = TRUE),
numeric(1)) })
data %>%
heat_tree(node_label = name,
node_size = n_obs,
node_color = change,
node_color_trans = "linear",
node_color_range = diverging_palette(),
node_color_interval = c(-1, 1),
edge_color_trans = "linear",
edge_color_range = diverging_palette(),
edge_color_interval = c(-1, 1),
node_label_max = 300,
output_file = file.path(output_folder,
paste0("gene_expression--molecular_function",
output_format)))
Comments